Udacity Exercises/Assignment2.py

   1 '''
   2 Exercise done in pycharm for logic and jupyter notebook for Ipython viewing
   3
   4 ISSUE: sanitized is probably broken
   5
   6 '''
   7 # These are all the modules we'll be using later. Make sure you can import them
   8 # before proceeding further.
   9 import numpy as np
  10 import tensorflow as tf
  11 from six.moves import cPickle as pickle
  12 from six.moves import range
  13
  14
  15 # Config the matplotlib backend as plotting inline in IPython
  16 '''%matplotlib inline'''
  17
  18 pickle_file  = 'notMNIST.pickle'
  19
  20 with open(pickle_file, 'rb') as f:
  21     save = pickle.load(f)
  22     train_dataset = save['train_dataset']
  23     train_labels = save['train_labels']
  24     valid_dataset = save['valid_dataset']
  25     valid_labels = save['valid_labels']
  26     test_dataset = save['test_dataset']
  27     test_labels = save['test_labels']
  28     del save  # hint to help gc free up memory
  29     print('Training set', train_dataset.shape, train_labels.shape)
  30     print('Validation set', valid_dataset.shape, valid_labels.shape)
  31     print('Test set', test_dataset.shape, test_labels.shape)
  32
  33 pickle_file_sanitized = 'notMNIST_sanitized.pickle'
  34 with open(pickle_file_sanitized, 'rb') as f:
  35     save_san = pickle.load(f)
  36     train_dataset_sanitized = save_san['train_dataset']
  37     train_labels_sanitized = save_san['train_labels']
  38     valid_dataset_sanitized = save_san['valid_dataset']
  39     valid_labels_sanitized = save_san['valid_labels']
  40     test_dataset_sanitized = save_san['test_dataset']
  41     test_labels_sanitized = save_san['test_labels']
  42
  43 image_size = 28
  44 num_labels = 10
  45
  46 def reformat(dataset, labels):
  47     dataset = dataset.reshape((-1, image_size * image_size)).astype(np.float32) #convert from 3D to 2D with index dimension
  48     # Map 0 to [1.0, 0.0, 0.0 ...], 1 to [0.0, 1.0, 0.0 ...]
  49     labels = (np.arange(num_labels) == labels[:,None]).astype(np.float32)
  50     return dataset, labels
  51
  52 train_dataset, train_labels = reformat(train_dataset, train_labels)
  53 valid_dataset, valid_labels = reformat(valid_dataset, valid_labels)
  54 test_dataset, test_labels = reformat(test_dataset, test_labels)
  55 print('---\nTraining set', train_dataset.shape, train_labels.shape)
  56 print('Validation set', valid_dataset.shape, valid_labels.shape)
  57 print('Test set', test_dataset.shape, test_labels.shape)
  58
  59 train_dataset_sanitized, train_labels_sanitized = reformat(train_dataset_sanitized, train_labels_sanitized)
  60 valid_dataset_sanitized, valid_labels_sanitized = reformat(valid_dataset_sanitized, valid_labels_sanitized)
  61 test_dataset_sanitized, test_labels_sanitized = reformat(test_dataset_sanitized, test_labels_sanitized)
  62 print('---\nTraining set Sant', train_dataset_sanitized.shape, train_labels_sanitized.shape)
  63 print('Validation set Sant', valid_dataset_sanitized.shape, valid_labels_sanitized.shape)
  64 print('Test set Sant', test_dataset_sanitized.shape, test_labels_sanitized.shape)
  65
  66 # With gradient descent training, even this much data is prohibitive.
  67 # Subset the training data for faster turnaround.
  68 train_subset = 10000 #gets a 10.5%, as good as a random pick
  69
  70 learning_rate = 0.01
  71
  72 graph = tf.Graph() #Graph is a set of tf.Operation objects repreesnting units of compuitation and td.Tensor objects represting data to flow between operations
  73 #Important note: This class is not thread-safe for graph construction. All operations should be created from a single thread, or external synchronization must be provided. Unless otherwise specified, all methods are not thread-safe.
  74 with graph.as_default():
  75       # Input data.
  76       # Load the training, validation and test data into constants that are
  77       # attached to the graph.
  78       tf_train_dataset = tf.constant(train_dataset[:train_subset, :]) #a constant is given a dtype with arguments and optional shape
  79       '''
  80       # Constant 1-D Tensor populated with value list.
  81 tensor = tf.constant([1, 2, 3, 4, 5, 6, 7]) => [1 2 3 4 5 6 7]
  82
  83 # Constant 2-D tensor populated with scalar value -1.
  84 tensor = tf.constant(-1.0, shape=[2, 3]) => [[-1. -1. -1.]
  85                                              [-1. -1. -1.]]
  86                                               '''
  87       tf_train_labels = tf.constant(train_labels[:train_subset])
  88       tf_valid_dataset = tf.constant(valid_dataset)
  89       tf_test_dataset = tf.constant(test_dataset)
  90       '''The above does not change'''
  91
  92       # Variables.
  93       # These are the parameters that we are going to be training. The weight
  94       # matrix will be initialized using random values following a (truncated)
  95       # normal distribution. The biases get initialized to zero.
  96       weights = tf.Variable( #variable mainstainsts state when graph calls run(). Variable takes an initial value which can be a tensor of any type. After construction type and shape are fixed
  97         tf.truncated_normal([image_size * image_size, num_labels])) #truncated_normal outputs a random value from a truncated normal distribution
  98       biases = tf.Variable(tf.zeros([num_labels])) #variables are assigned to the graph
  99       '''These will adjust to give values'''
 100
 101       # Training computation.
 102       # We multiply the inputs with the weight matrix, and add biases. We compute
 103       # the softmax and cross-entropy (it's one operation in TensorFlow, because
 104       # it's very common, and it can be optimized). We take the average of this
 105       # cross-entropy across all training examples: that's our loss.
 106       logits = tf.matmul(tf_train_dataset, weights) + biases #matrix multiply ojbect of  wieghts and Training with biases, stored for adjustment
 107       '''
 108       logit function is the inverse of the sigmoidal "logistic" function represents a probability p, logit function gives the log-odds, or the logarithm of the odds p/(1 − p).
 109       https://stackoverflow.com/questions/41455101/what-is-the-meaning-of-the-word-logits-in-tensorflow
 110       Logit is a function that maps probabilities [0, 1] to [-inf, +inf].
 111       Softmax is a function that maps [-inf, +inf] to [0, 1] similar as Sigmoid. But Softmax also normalizes the sum of the values(output vector) to be 1.
 112       Tensorflow "with logit": It means that you are applying a softmax function to logit numbers to normalize it. The input_vector/logit is not normalized and can scale from [-inf, inf].
 113       '''
 114       # loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=tf_train_labels)) #Computes softmax cross entropy between logits and labels. (deprecated)
 115       loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=logits, labels=tf_train_labels)) #finds cross entropy between logits and lables. Excpects unscaled logits since performs softmax on logits for efficiency
 116       # Optimizer.
 117       # We are going to find the minimum of this loss using gradient descent.
 118       optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss) # Optimizer for gradient descent.  Has learning rate and locks for updates. Follows by minimize with variable containing the value to minimize.
 119
 120       # Predictions for the training, validation, and test data.
 121       # These are not part of training, but merely here so that we can report
 122       # accuracy figures as we train.
 123       train_prediction = tf.nn.softmax(logits) #computes softmax activations retuyrnign tensor of same shape as logits.
 124       valid_prediction = tf.nn.softmax(
 125         tf.matmul(tf_valid_dataset, weights) + biases)
 126       test_prediction = tf.nn.softmax(tf.matmul(tf_test_dataset, weights) + biases)
 127
 128 num_steps = 801
 129
 130 def accuracy(predictions, labels):
 131     return (100.0 * np.sum(np.argmax(predictions, 1) == np.argmax(labels, 1))
 132           / predictions.shape[0])
 133
 134
 135
 136 with tf.Session(graph=graph) as session:
 137     # This is a one-time operation which ensures the parameters get initialized as
 138     # we described in the graph: random weights for the matrix, zeros for the
 139     # biases.
 140     tf.global_variables_initializer().run()
 141     print('Initialized')
 142     for step in range(num_steps):
 143         # Run the computations. We tell .run() that we want to run the optimizer,
 144         # and get the loss value and the training predictions returned as numpy
 145         # arrays.
 146         _, l, predictions = session.run([optimizer, loss, train_prediction])
 147         if (step % 100 == 0):
 148           print('Loss at step %d: %f' % (step, l))
 149           print('Training accuracy: %.1f%%' % accuracy(
 150             predictions, train_labels[:train_subset, :]))
 151           # Calling .eval() on valid_prediction is basically like calling run(), but
 152           # just to get that one numpy array. Note that it recomputes all its graph
 153           # dependencies.
 154           print('Validation accuracy: %.1f%%' % accuracy(
 155             valid_prediction.eval(), valid_labels))
 156     print('Test accuracy: %.1f%%' % accuracy(test_prediction.eval(), test_labels))
 157
 158
 159 '''SGD'''
 160 print("-----------\nSDG UNSANITIZED")
 161
 162 #learning_rate = 0.01
 163 batch_size = 128 #63-66.3% test accuracy @ LL 0.01
 164 #batch_size = 10000 #decent test accuracy of 74.2%,
 165
 166 graph = tf.Graph()
 167 with graph.as_default():
 168
 169     # Input data. For the training data, we use a placeholder that will be fed
 170     # at run time with a training minibatch.
 171     tf_train_dataset = tf.placeholder(tf.float32, #placeholders are always fed.  Takes the type of elements to be fed. Optional shape of tensor. Name for tessor operation
 172                                     shape=(batch_size, image_size * image_size))
 173     tf_train_labels = tf.placeholder(tf.float32, shape=(batch_size, num_labels))
 174     tf_valid_dataset = tf.constant(valid_dataset)
 175     tf_test_dataset = tf.constant(test_dataset)
 176
 177     # Variables.
 178     weights = tf.Variable(
 179     tf.truncated_normal([image_size * image_size, num_labels]))
 180     biases = tf.Variable(tf.zeros([num_labels]))
 181
 182     # Training computation.
 183     logits = tf.matmul(tf_train_dataset, weights) + biases
 184     #loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=tf_train_labels))
 185     loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=logits, labels=tf_train_labels)) #finds cross entropy between logits and lables. Excpects unscaled logits since performs softmax on logits for efficiency
 186
 187
 188     # Optimizer.
 189     optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss)
 190
 191     # Predictions for the training, validation, and test data.
 192     train_prediction = tf.nn.softmax(logits)
 193     valid_prediction = tf.nn.softmax(
 194     tf.matmul(tf_valid_dataset, weights) + biases)
 195     test_prediction = tf.nn.softmax(tf.matmul(tf_test_dataset, weights) + biases)
 196
 197 num_steps = 3001
 198
 199 with tf.Session(graph=graph) as session:
 200     tf.global_variables_initializer().run()
 201     print("Initialized")
 202     for step in range(num_steps):
 203         # Pick an offset within the training data, which has been randomized.
 204         # Note: we could use better randomization across epochs.
 205         offset = (step * batch_size) % (train_labels.shape[0] - batch_size)
 206         # Generate a minibatch.
 207         batch_data = train_dataset[offset:(offset + batch_size), :]
 208         batch_labels = train_labels[offset:(offset + batch_size), :]
 209         # Prepare a dictionary telling the session where to feed the minibatch.
 210         # The key of the dictionary is the placeholder node of the graph to be fed,
 211         # and the value is the numpy array to feed to it.
 212         feed_dict = {tf_train_dataset : batch_data, tf_train_labels : batch_labels}
 213         _, l, predictions = session.run(
 214           [optimizer, loss, train_prediction], feed_dict=feed_dict)
 215         if (step % 500 == 0):
 216           print("Minibatch loss at step %d: %f" % (step, l))
 217           print("Minibatch accuracy: %.1f%%" % accuracy(predictions, batch_labels))
 218           print("Validation accuracy: %.1f%%" % accuracy(
 219             valid_prediction.eval(), valid_labels))
 220     print("Test accuracy: %.1f%%" % accuracy(test_prediction.eval(), test_labels))
 221
 222
 223 print("------\nSDG SANITIZED")
 224
 225 #batch_size = 128 #59-63% test accuracy
 226 #batch_size = 10000 #decent test accuracy of 71%,
 227
 228 graph = tf.Graph()
 229 with graph.as_default():
 230     tf_train_dataset = tf.placeholder(tf.float32, shape=(batch_size, image_size*image_size))
 231     tf_train_labels = tf.placeholder(tf.float32, shape=(batch_size, num_labels))
 232     tf_valid_dataset = tf.constant(valid_dataset_sanitized)
 233     tf_test_dataset = tf.constant(test_dataset_sanitized)
 234
 235     weights = tf.Variable(tf.truncated_normal([image_size*image_size, num_labels]))
 236     biases = tf.Variable(tf.zeros([num_labels]))
 237
 238     logits = tf.matmul(tf_train_dataset, weights) + biases
 239     loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=logits, labels=tf_train_labels))
 240
 241     optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss)
 242
 243     # Predictions for the training, validation, and test data.
 244     train_prediction = tf.nn.softmax(logits)
 245     valid_prediction = tf.nn.softmax(
 246     tf.matmul(tf_valid_dataset, weights) + biases)
 247     test_prediction = tf.nn.softmax(tf.matmul(tf_test_dataset, weights) + biases)
 248
 249 num_steps = 3001
 250 with tf.Session(graph=graph) as session:
 251     tf.global_variables_initializer().run() #Returns an Op that initializes global variables.
 252     print("Initialized")
 253     for step in range(num_steps):
 254         offset = (step * batch_size) % (train_labels.shape[0] - batch_size)
 255         batch_data = train_dataset_sanitized[offset:(offset + batch_size), :]
 256         batch_labels = train_labels_sanitized[offset:(offset + batch_size), :]
 257         feed_dict = {tf_train_dataset : batch_data, tf_train_labels : batch_labels}
 258         _, l, predictions = session.run(
 259           [optimizer, loss, train_prediction], feed_dict=feed_dict)
 260         if (step % 500 == 0):
 261           print("Minibatch loss at step %d: %f" % (step, l))
 262           print("Minibatch accuracy: %.1f%%" % accuracy(predictions, batch_labels))
 263           print("Validation accuracy: %.1f%%" % accuracy(
 264             valid_prediction.eval(), valid_labels_sanitized))
 265     print("Test accuracy: %.1f%%" % accuracy(test_prediction.eval(), test_labels_sanitized))
 266
 267 '''
 268 Problem
 269 Turn the logistic regression example with SGD into a 1-hidden layer neural network
 270 with rectified linear units nn.relu() and 1024 hidden nodes. This model should improve
 271 your validation / test accuracy.
 272 '''
 273
 274
 275
 276
 277
 278
 279
 280
 281
 282
 283
 284 print("-----\n1 DEEP NN")
 285 '''http://x-wei.github.io/dlMOOC_L2.html'''
 286 batch_size = 128
 287 num_hidden = 1024
 288
 289 learning_rate = 0.025
 290
 291 graph = tf.Graph()
 292 with graph.as_default():
 293
 294   # Input data. For the training data, we use a placeholder that will be fed
 295   # at run time with a training minibatch.
 296   tf_train_dataset = tf.placeholder(tf.float32,
 297                                     shape=(batch_size, image_size * image_size))
 298   tf_train_labels = tf.placeholder(tf.float32, shape=(batch_size, num_labels))
 299   tf_valid_dataset = tf.constant(valid_dataset)
 300   tf_test_dataset = tf.constant(test_dataset)
 301
 302   # Variables for linear layer 1
 303   W1 = tf.Variable(
 304     tf.truncated_normal([image_size * image_size, num_hidden]))
 305   b1 = tf.Variable(tf.zeros([num_hidden]))
 306
 307   # Hidden RELU input computation
 308   y1 = tf.matmul(tf_train_dataset, W1) + b1
 309   # Hidden RELU output computation
 310   X1 = tf.nn.relu(y1)
 311
 312   # Variables for linear layer 2
 313   W2 = tf.Variable(
 314     tf.truncated_normal([num_hidden, num_labels]))#W2
 315   b2 = tf.Variable(tf.zeros([num_labels])) #b2
 316   # logit (y2) output
 317   logits = tf.matmul(X1, W2) + b2
 318   loss = tf.reduce_mean(
 319     tf.nn.softmax_cross_entropy_with_logits_v2(logits=logits, labels=tf_train_labels))
 320
 321   def getlogits(X):
 322     y1 = tf.matmul(X, W1) + b1
 323     X1 = tf.nn.relu(y1)
 324     return tf.matmul(X1, W2) + b2
 325
 326   # Optimizer.
 327   optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss)
 328
 329   # Predictions for the training, validation, and test data.
 330   train_prediction = tf.nn.softmax(logits)
 331   valid_prediction = tf.nn.softmax( getlogits(tf_valid_dataset) )
 332   test_prediction = tf.nn.softmax( getlogits(tf_test_dataset))
 333
 334 #run sgd optimization:
 335
 336 num_steps = 3001
 337
 338 with tf.Session(graph=graph) as session:
 339   tf.global_variables_initializer().run()
 340   print("Initialized")
 341   for step in range(num_steps):
 342     # Pick an offset within the training data, which has been randomized.
 343     # Note: we could use better randomization across epochs.
 344     offset = (step * batch_size) % (train_labels.shape[0] - batch_size)
 345     # Generate a minibatch.
 346     batch_data = train_dataset[offset:(offset + batch_size), :]
 347     batch_labels = train_labels[offset:(offset + batch_size), :]
 348     # Prepare a dictionary telling the session where to feed the minibatch.
 349     # The key of the dictionary is the placeholder node of the graph to be fed,
 350     # and the value is the numpy array to feed to it.
 351     feed_dict = {tf_train_dataset : batch_data, tf_train_labels : batch_labels}
 352     _, l, predictions = session.run(
 353       [optimizer, loss, train_prediction], feed_dict=feed_dict)
 354     if (step % 500 == 0):
 355       print("Minibatch loss at step %d: %f" % (step, l))
 356       print("Minibatch accuracy: %.1f%%" % accuracy(predictions, batch_labels))
 357       print("Validation accuracy: %.1f%%" % accuracy(
 358         valid_prediction.eval(), valid_labels))
 359   print("Test accuracy: %.1f%%" % accuracy(test_prediction.eval(), test_labels))
 360 print("done")